Question 1:
The bumblebee bat (Kitti’s hog-nosed bat) is recognized as the world’s smallest mammal. This analysis examines weight measurements from a sample of these bats to test whether they come from a population with a mean weight of 1.8 grams.
Input the bumblebee bat weight data
# Create a single data frame for consistency
bat_data <- data.frame(weight = c(1.7, 1.6, 1.5, 2.0, 2.3, 1.6, 1.6, 1.8, 1.5, 1.7, 1.2, 1.4, 1.6, 1.6, 1.6))
Calculate comprehensive summary statistics
summary_stats <- data.frame(
n = length(bat_data$weight),
mean = mean(bat_data$weight),
median = median(bat_data$weight),
sd = sd(bat_data$weight),
se = sd(bat_data$weight)/sqrt(length(bat_data$weight)),
diff_from_null = mean(bat_data$weight) - 1.8, # Difference from hypothesized mean
skewness = moments::skewness(bat_data$weight),
kurtosis = moments::kurtosis(bat_data$weight)
)
# Statistical tests
sw_test <- shapiro.test(bat_data$weight)
ad_test <- ad.test(bat_data$weight)
t_test_result <- t.test(bat_data$weight, mu = 1.8)
sign_test <- binom.test(
sum(bat_data$weight > 1.8),
length(bat_data$weight),
p = 0.5,
alternative = "two.sided"
)
Print summary statistics with clear formatting
print("Summary Statistics:")
## [1] "Summary Statistics:"
print(paste("Sample Size (n):", summary_stats$n))
## [1] "Sample Size (n): 15"
print(paste("Sample Mean:", round(summary_stats$mean, 3), "grams"))
## [1] "Sample Mean: 1.647 grams"
print(paste("Sample SD:", round(summary_stats$sd, 3), "grams"))
## [1] "Sample SD: 0.253 grams"
print(paste("Standard Error:", round(summary_stats$se, 3), "grams"))
## [1] "Standard Error: 0.065 grams"
print(paste("Difference from Null:", round(summary_stats$diff_from_null, 3), "grams"))
## [1] "Difference from Null: -0.153 grams"
The sample mean of r round(summary_stats\(mean, 3) grams is noticeably lower than our hypothesized value of 1.8 grams. This difference of r abs(round(summary_stats\)diff_from_null, 3)) grams needs to be evaluated in the context of our sample’s variability.
Create QQ plot
qq_plot <- ggplot(bat_data, aes(sample = weight)) +
stat_qq(color = "#38bdf8") +
stat_qq_line(color = "#f87171") +
labs(title = "Normal Q-Q Plot of Bumblebee Bat Weights",
subtitle = paste("Shapiro-Wilk p-value:", round(sw_test$p.value, 4)),
x = "Theoretical Quantiles",
y = "Sample Quantiles") +
annotate("text", x = -1, y = max(bat_data$weight),
label = paste("Skewness:", round(summary_stats$skewness, 3)),
color = "white", hjust = 0) +
annotate("text", x = -1, y = max(bat_data$weight) - 0.1,
label = paste("Kurtosis:", round(summary_stats$kurtosis, 3)),
color = "white", hjust = 0) +
custom_dark_theme +
theme(plot.title = element_text(hjust = 0.5))
print(qq_plot)
The visual diagnostics suggest:
While not perfectly normal, the data appears to satisfy the normality assumption sufficiently for our t-test given:
Create histogram with density curve
hist_plot <- ggplot(bat_data, aes(x = weight)) +
geom_histogram(aes(y = after_stat(density)), binwidth = 0.1,
fill = "#38bdf8", color = "black", alpha = 0.7) +
geom_density(color = "#f87171", linewidth = 1) +
geom_vline(xintercept = mean(bat_data$weight),
color = "#818cf8", linetype = "dashed", linewidth = 1) +
geom_vline(xintercept = 1.8,
color = "#f87171", linetype = "dashed", linewidth = 1) +
annotate("text", x = mean(bat_data$weight), y = max(density(bat_data$weight)$y),
label = paste("x̄ =", round(mean(bat_data$weight), 3)),
color = "white", hjust = -0.2) +
annotate("text", x = 1.8, y = max(density(bat_data$weight)$y) * 0.9,
label = "H₀: μ = 1.8",
color = "#f87171", hjust = -0.2) +
labs(title = "Distribution of Bumblebee Bat Weights",
subtitle = paste("n =", length(bat_data$weight), ", SD =", round(sd(bat_data$weight), 3)),
x = "Weight (grams)",
y = "Density") +
custom_dark_theme +
theme(plot.title = element_text(hjust = 0.5))
print(hist_plot)
Create t-distribution plot with critical regions
# Calculate t-statistic
t_stat <- (mean(bat_data$weight) - 1.8)/(sd(bat_data$weight)/sqrt(length(bat_data$weight)))
df <- length(bat_data$weight) - 1
t_crit <- qt(0.975, df)
t_vals <- seq(-4, 4, length.out = 1000)
t_dens <- dt(t_vals, df)
t_dist_data <- data.frame(t = t_vals, density = t_dens)
t_plot <- ggplot(t_dist_data, aes(x = t, y = density)) +
geom_line(color = "#38bdf8") +
geom_vline(xintercept = c(-t_crit, t_crit),
color = "#f87171", linetype = "dashed") +
geom_vline(xintercept = t_stat,
color = "#818cf8", linetype = "solid") +
geom_ribbon(data = subset(t_dist_data, t <= -t_crit | t >= t_crit),
aes(ymax = density, ymin = 0),
fill = "#f87171", alpha = 0.2) +
annotate("text", x = t_stat, y = max(t_dens) * 0.8,
label = paste("t =", round(t_stat, 3)),
color = "white") +
annotate("text", x = -t_crit, y = max(t_dens) * 0.6,
label = paste("t_crit = ±", round(t_crit, 3)),
color = "#f87171") +
labs(title = "t-Distribution with Critical Regions",
subtitle = paste("p-value =", round(t_test_result$p.value, 4)),
x = "t-value",
y = "Density") +
custom_dark_theme +
theme(plot.title = element_text(hjust = 0.5))
print(t_plot)
Create enhanced residual plot
residuals <- bat_data$weight - mean(bat_data$weight)
fitted_values <- rep(mean(bat_data$weight), length(bat_data$weight))
residual_plot <- ggplot(data.frame(fitted = fitted_values, residuals = residuals),
aes(x = fitted, y = residuals)) +
geom_point(color = "#38bdf8") +
geom_hline(yintercept = 0, linetype = "dashed", color = "#f87171") +
geom_smooth(method = "loess", color = "#818cf8", se = TRUE, fill = "#818cf8", alpha = 0.2) +
annotate("text", x = mean(fitted_values), y = max(residuals),
label = paste("SD of residuals:", round(sd(residuals), 3)),
color = "white") +
labs(title = "Residual Plot",
subtitle = "Check for patterns",
x = "Fitted Values",
y = "Residuals") +
custom_dark_theme +
theme(plot.title = element_text(hjust = 0.5))
print(residual_plot)
Arrange all plots in a grid
grid.arrange(qq_plot, hist_plot, t_plot, residual_plot,
ncol = 2,
top = grid::textGrob("Bumblebee Bat Weight Analysis",
gp = grid::gpar(col = "white", fontsize = 16)))
Create interactive QQ and histogram plots
qq_interactive <- ggplotly(qq_plot) %>%
layout(
plot_bgcolor = "#222222",
paper_bgcolor = "#222222",
font = list(color = "#FFFFFF"),
xaxis = list(
gridcolor = "#444444",
title = list(
text = "Theoretical Quantiles",
font = list(color = "#FFFFFF", size = 14)
),
tickfont = list(color = "#FFFFFF")
),
yaxis = list(
gridcolor = "#444444",
title = list(
text = "Sample Quantiles",
font = list(color = "#FFFFFF", size = 14)
),
tickfont = list(color = "#FFFFFF")
),
margin = list(l = 80, r = 40, t = 80, b = 80)
)
print(qq_interactive)
hist_interactive <- ggplotly(hist_plot) %>%
layout(
plot_bgcolor = "#222222",
paper_bgcolor = "#222222",
font = list(color = "#FFFFFF"),
xaxis = list(
gridcolor = "#444444",
title = list(
text = "Weight (grams)",
font = list(color = "#FFFFFF", size = 14)
),
tickfont = list(color = "#FFFFFF")
),
yaxis = list(
gridcolor = "#444444",
title = list(
text = "Density",
font = list(color = "#FFFFFF", size = 14)
),
tickfont = list(color = "#FFFFFF")
),
margin = list(l = 80, r = 40, t = 80, b = 80)
)
print(hist_interactive)
Print interactive plots side by side
plotly::subplot(qq_interactive, hist_interactive, nrows = 1, titleX = TRUE, titleY = TRUE, widths = c(0.5, 0.5)) %>%
layout(
plot_bgcolor = "#222222",
paper_bgcolor = "#222222",
font = list(color = "#FFFFFF"),
margin = list(t = 50, b = 50, l = 50, r = 50),
showlegend = FALSE,
title = list(
text = "Interactive Plots of Bumblebee Bat Weights",
font = list(color = "#FFFFFF", size = 16)
)
)
The hypotheses can be written as:
The null hypothesis H₀ states that the population mean weight of bumblebee bats is 1.8 grams. The alternative hypothesis H₁ states that the population mean weight differs from 1.8 grams.
Mathematically, this can be expressed as:
We will use α = 0.05 (two-tailed test) Critical values: t₀.₀₂₅,₁₄ = ±2.145 (from t-distribution with 14 df)
Calculate t-statistic
t_stat <- (mean(bat_data$weight) - 1.8)/(sd(bat_data$weight)/sqrt(length(bat_data$weight)))
Print detailed t-statistic calculation
print(paste("Sample mean:", round(mean(bat_data$weight), 3)))
## [1] "Sample mean: 1.647"
print(paste("Hypothesized mean:", 1.8))
## [1] "Hypothesized mean: 1.8"
print(paste("Standard error:", round(sd(bat_data$weight)/sqrt(length(bat_data$weight)), 4)))
## [1] "Standard error: 0.0654"
print(paste("t-statistic:", round(t_stat, 4)))
## [1] "t-statistic: -2.3457"
Calculate two-tailed p-value
p_value <- 2 * pt(abs(t_stat), df = length(bat_data$weight) - 1, lower.tail = FALSE)
print(paste("p-value:", round(p_value, 4)))
## [1] "p-value: 0.0342"
critical_value <- qt(0.975, df = length(bat_data$weight) - 1)
print(paste("Critical value (±):", round(critical_value, 4)))
## [1] "Critical value (±): 2.1448"
Print Comprehensive Statistical Summary
cat("\nComprehensive Statistical Analysis of Bumblebee Bat Weights\n")
##
## Comprehensive Statistical Analysis of Bumblebee Bat Weights
cat("=======================================================\n\n")
## =======================================================
cat("Basic Summary Statistics:\n")
## Basic Summary Statistics:
cat("-----------------------\n")
## -----------------------
cat("Sample Size:", summary_stats$n, "\n")
## Sample Size: 15
cat("Mean:", round(summary_stats$mean, 3), "grams\n")
## Mean: 1.647 grams
cat("Median:", round(summary_stats$median, 3), "grams\n")
## Median: 1.6 grams
cat("Standard Deviation:", round(summary_stats$sd, 3), "grams\n")
## Standard Deviation: 0.253 grams
cat("Standard Error:", round(summary_stats$se, 3), "grams\n")
## Standard Error: 0.065 grams
cat("Skewness:", round(summary_stats$skewness, 3), "\n")
## Skewness: 0.983
cat("Kurtosis:", round(summary_stats$kurtosis, 3), "\n\n")
## Kurtosis: 4.523
cat("Normality Tests:\n")
## Normality Tests:
cat("---------------\n")
## ---------------
cat("Shapiro-Wilk Test:\n")
## Shapiro-Wilk Test:
cat(" W =", round(sw_test$statistic, 4), "\n")
## W = 0.8844
cat(" p-value =", round(sw_test$p.value, 4), "\n\n")
## p-value = 0.0552
cat("Anderson-Darling Test:\n")
## Anderson-Darling Test:
cat(" A =", round(ad_test$statistic, 4), "\n")
## A = 0.8755
cat(" p-value =", round(ad_test$p.value, 4), "\n\n")
## p-value = 0.0185
cat("One-Sample t-Test Results:\n")
## One-Sample t-Test Results:
cat("------------------------\n")
## ------------------------
cat("H₀: μ = 1.8 grams\n")
## H₀: μ = 1.8 grams
cat("t-statistic:", round(t_test_result$statistic, 3), "\n")
## t-statistic: -2.346
cat("degrees of freedom:", t_test_result$parameter, "\n")
## degrees of freedom: 14
cat("p-value:", round(t_test_result$p.value, 4), "\n")
## p-value: 0.0342
cat("95% CI:", round(t_test_result$conf.int[1], 3), "to",
round(t_test_result$conf.int[2], 3), "grams\n\n")
## 95% CI: 1.506 to 1.787 grams
cat("Sign Test Results:\n")
## Sign Test Results:
cat("----------------\n")
## ----------------
cat("H₀: median = 1.8 grams\n")
## H₀: median = 1.8 grams
cat("Number of values > 1.8:", sign_test$statistic, "\n")
## Number of values > 1.8: 2
cat("p-value:", round(sign_test$p.value, 4), "\n")
## p-value: 0.0074
With |t-statistic| = r round(abs(t_stat), 4) > critical value = r round(critical_value, 4), and p-value = r round(p_value, 4) < α = 0.05, we reject the null hypothesis. There is sufficient evidence to conclude that the true population mean weight of bumblebee bats differs from 1.8 grams.
Calculate 95% confidence interval
t_crit <- qt(0.975, df = length(bat_data$weight) - 1)
margin_error <- t_crit * sd(bat_data$weight)/sqrt(length(bat_data$weight))
ci <- c(mean(bat_data$weight) - margin_error, mean(bat_data$weight) + margin_error)
print(paste("95% Confidence Interval:", round(ci[1], 3), "to", round(ci[2], 3)))
## [1] "95% Confidence Interval: 1.506 to 1.787"
Statistical Conclusion: There is sufficient evidence to conclude that the true population mean weight of bumblebee bats differs from 1.8 grams (p = r round(p_value, 4)). The 95% confidence interval for the true population mean is (r round(ci[1], 3), r round(ci[2], 3)) grams.
Practical Interpretation: The sample data suggests that the hypothesized mean weight of 1.8 grams is plausible for this population. While our sample mean of r round(mean(bat_data$weight), 3) grams differs slightly from 1.8, this difference is not statistically significant. The confidence interval tells us we can be 95% confident that the true population mean falls between r round(ci[1], 3) and r round(ci[2], 3) grams.
Scope of Inference: While our analysis provides valuable insights about bumblebee bat weights, the generalizability of these findings depends on the sampling methodology, which wasn’t specified. If these measurements represent a random sample from the broader population, our conclusions could extend to the species as a whole. However, if the sample was taken from a specific location or time period, our inferences should be limited accordingly.